library(nycflights13)
library(tidyverse)
library(dplyr)

5.2 Filter rows with filter()

5.2.1

Find all flights that:

  1. Had an arrival delay of two or more hours
filter(flights, arr_delay >= 2)
  1. Flew to Houston (IAH or HOU)
filter(flights, dest == "IAH" | dest == "HOU")

or

filter(flights, dest %in% c("IAH", "HOU"))
  1. Were operated by United, American, or Delta
filter(flights, carrier %in% c("UA", "AA", "DL"))
  1. Departed in summer (July, August, and September)
filter(flights, month %in% c(7, 8, 9))
  1. Arrived more than two hours late, but didn’t leave late
filter(flights, dep_delay <= 0 & arr_time >= 120)
  1. Were delayed by at least an hour, but made up over 30 minutes in flight
filter(flights, dep_delay >= 60 & dep_delay - arr_delay >30 )
  1. Departed between midnight and 6am (inclusive)
summary(flights$dep_time)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
      1     907    1401    1349    1744    2400    8255 
filter(flights, dep_time %% 2400 <= 600)

5.2.2

Another useful dplyr filtering helper is between(). What does it do? Can you use it to simplify the code needed to answer the previous challenges?

This is a shortcut for x >= left & x <= right
between(vector of values, left boundary, right boundary)

filter(flights, between(month, 7, 9))

5.2.3

How many flights have a missing dep_time? What other variables are missing? What might these rows represent?

count(flights, is.na(dep_time))
summary(flights)
      year          month             day           dep_time    sched_dep_time
 Min.   :2013   Min.   : 1.000   Min.   : 1.00   Min.   :   1   Min.   : 106  
 1st Qu.:2013   1st Qu.: 4.000   1st Qu.: 8.00   1st Qu.: 907   1st Qu.: 906  
 Median :2013   Median : 7.000   Median :16.00   Median :1401   Median :1359  
 Mean   :2013   Mean   : 6.549   Mean   :15.71   Mean   :1349   Mean   :1344  
 3rd Qu.:2013   3rd Qu.:10.000   3rd Qu.:23.00   3rd Qu.:1744   3rd Qu.:1729  
 Max.   :2013   Max.   :12.000   Max.   :31.00   Max.   :2400   Max.   :2359  
                                                 NA's   :8255                 
   dep_delay          arr_time    sched_arr_time   arr_delay       
 Min.   : -43.00   Min.   :   1   Min.   :   1   Min.   : -86.000  
 1st Qu.:  -5.00   1st Qu.:1104   1st Qu.:1124   1st Qu.: -17.000  
 Median :  -2.00   Median :1535   Median :1556   Median :  -5.000  
 Mean   :  12.64   Mean   :1502   Mean   :1536   Mean   :   6.895  
 3rd Qu.:  11.00   3rd Qu.:1940   3rd Qu.:1945   3rd Qu.:  14.000  
 Max.   :1301.00   Max.   :2400   Max.   :2359   Max.   :1272.000  
 NA's   :8255      NA's   :8713                  NA's   :9430      
   carrier              flight       tailnum             origin         
 Length:336776      Min.   :   1   Length:336776      Length:336776     
 Class :character   1st Qu.: 553   Class :character   Class :character  
 Mode  :character   Median :1496   Mode  :character   Mode  :character  
                    Mean   :1972                                        
                    3rd Qu.:3465                                        
                    Max.   :8500                                        
                                                                        
     dest              air_time        distance         hour           minute     
 Length:336776      Min.   : 20.0   Min.   :  17   Min.   : 1.00   Min.   : 0.00  
 Class :character   1st Qu.: 82.0   1st Qu.: 502   1st Qu.: 9.00   1st Qu.: 8.00  
 Mode  :character   Median :129.0   Median : 872   Median :13.00   Median :29.00  
                    Mean   :150.7   Mean   :1040   Mean   :13.18   Mean   :26.23  
                    3rd Qu.:192.0   3rd Qu.:1389   3rd Qu.:17.00   3rd Qu.:44.00  
                    Max.   :695.0   Max.   :4983   Max.   :23.00   Max.   :59.00  
                    NA's   :9430                                                  
   time_hour                  
 Min.   :2013-01-01 05:00:00  
 1st Qu.:2013-04-04 13:00:00  
 Median :2013-07-03 10:00:00  
 Mean   :2013-07-03 05:22:54  
 3rd Qu.:2013-10-01 07:00:00  
 Max.   :2013-12-31 23:00:00  
                              

5.3 Arrange rows with arrange()

5.3.1

How could you use arrange() to sort all missing values to the start? (Hint: use is.na()).

5.3.2

Sort flights to find the most delayed flights. Find the flights that left earliest

arrange(flights, desc(dep_delay))
arrange(flights, dep_delay)

5.3.3

Sort flights to find the fastest (highest speed) flights.

5.3.4

Which flights travelled the farthest? Which travelled the shortest?

arrange(flights, desc(distance))
arrange(flights, distance)

5.4 Select colums with select()

5.4.1

Brainstorm as many ways as possible to select dep_time, dep_delay, arr_time, and arr_delay from flights.

5.4.2

What happens if you include the name of a variable multiple times in a select() call?

5.4.3

What does the any_of() function do? Why might it be helpful in conjunction with this vector?

5.4.4

Does the result of running the following code surprise you? How do the select helpers deal with case by default? How can you change that default?

Default is to ignore case.

select(flights, contains("TIME"))
select(flights, contains("TIME", ignore.case = FALSE))

5.5 Add new variables with mutate()

5.5.1

Currently dep_time and sched_dep_time are convenient to look at, but hard to compute with because they’re not really continuous numbers. Convert them to a more convenient representation of number of minutes since midnight.

5.5.2

Compare air_time with arr_time - dep_time. What do you expect to see? What do you see? What do you need to do to fix it?

Still doesn’t solve, other variables could be time zone differences.

5.5.3

Compare dep_time, sched_dep_time, and dep_delay. How would you expect those three numbers to be related?

dep_time - sched_dep_time == dep_delay

5.5.4

Find the 10 most delayed flights using a ranking function. How do you want to handle ties? Carefully read the documentation for min_rank().

min_rank() assigns tied values the same rank.

min_rank(c(10, 5, 1, 5, 5))
[1] 5 2 1 2 2

5.5.5

What does 1:3 + 1:10 return? Why?

1:3 + 1:10
Warning in 1:3 + 1:10 :
  longer object length is not a multiple of shorter object length
 [1]  2  4  6  5  7  9  8 10 12 11

You can only add vectors of different lengths if one is a multiple of another.

1:2 + 1:10
 [1]  2  4  4  6  6  8  8 10 10 12

5.5.6

What trigonometric functions does R provide?

These can be viewed in ?Trig documentation.
- cos(x), sin(x), tan(x) - acos(x), asin(x), atan(x), atan2(y, x) - cospi(x), sinpi(x), tanpi(x)

LS0tDQp0aXRsZTogIkNoYXB0ZXIgNSBleGVyY2lzZXM6IERhdGEgdHJhbnNmb3JtYXRpb24iDQpvdXRwdXQ6IA0KICBodG1sX25vdGVib29rOg0KICAgIHRvYzogVFJVRQ0KLS0tDQpgYGB7cn0NCmxpYnJhcnkobnljZmxpZ2h0czEzKQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KGRwbHlyKQ0KYGBgDQoNCiMjIDUuMiBGaWx0ZXIgcm93cyB3aXRoIGZpbHRlcigpDQoNCiMjIyA1LjIuMQ0KPiBGaW5kIGFsbCBmbGlnaHRzIHRoYXQ6DQoNCj4gMS4gSGFkIGFuIGFycml2YWwgZGVsYXkgb2YgdHdvIG9yIG1vcmUgaG91cnMNCg0KYGBge3J9DQpmaWx0ZXIoZmxpZ2h0cywgYXJyX2RlbGF5ID49IDIpDQpgYGANCj4gMi4gRmxldyB0byBIb3VzdG9uIChJQUggb3IgSE9VKQ0KDQpgYGB7ciwgZXZhbD1GQUxTRX0NCmZpbHRlcihmbGlnaHRzLCBkZXN0ID09ICJJQUgiIHwgZGVzdCA9PSAiSE9VIikNCmBgYCAgDQpvciAgDQoNCmBgYHtyLCBldmFsPSBUUlVFfQ0KZmlsdGVyKGZsaWdodHMsIGRlc3QgJWluJSBjKCJJQUgiLCAiSE9VIikpDQpgYGANCj4gMy4gV2VyZSBvcGVyYXRlZCBieSBVbml0ZWQsIEFtZXJpY2FuLCBvciBEZWx0YSAgDQoNCmBgYHtyfQ0KZmlsdGVyKGZsaWdodHMsIGNhcnJpZXIgJWluJSBjKCJVQSIsICJBQSIsICJETCIpKQ0KYGBgDQo+IDQuIERlcGFydGVkIGluIHN1bW1lciAoSnVseSwgQXVndXN0LCBhbmQgU2VwdGVtYmVyKQ0KDQpgYGB7cn0NCmZpbHRlcihmbGlnaHRzLCBtb250aCAlaW4lIGMoNywgOCwgOSkpDQpgYGANCj4gNS4gQXJyaXZlZCBtb3JlIHRoYW4gdHdvIGhvdXJzIGxhdGUsIGJ1dCBkaWRu4oCZdCBsZWF2ZSBsYXRlICANCg0KYGBge3J9DQpmaWx0ZXIoZmxpZ2h0cywgZGVwX2RlbGF5IDw9IDAgJiBhcnJfdGltZSA+PSAxMjApDQpgYGANCj4gNi4gV2VyZSBkZWxheWVkIGJ5IGF0IGxlYXN0IGFuIGhvdXIsIGJ1dCBtYWRlIHVwIG92ZXIgMzAgbWludXRlcyBpbiBmbGlnaHQNCg0KYGBge3J9DQpmaWx0ZXIoZmxpZ2h0cywgZGVwX2RlbGF5ID49IDYwICYgZGVwX2RlbGF5IC0gYXJyX2RlbGF5ID4zMCApDQpgYGANCj4gNy4gRGVwYXJ0ZWQgYmV0d2VlbiBtaWRuaWdodCBhbmQgNmFtIChpbmNsdXNpdmUpICANCg0KYGBge3J9DQpzdW1tYXJ5KGZsaWdodHMkZGVwX3RpbWUpDQpgYGANCg0KDQpgYGB7ciwgZXZhbD1UUlVFfQ0KZmlsdGVyKGZsaWdodHMsIGRlcF90aW1lICUlIDI0MDAgPD0gNjAwKQ0KYGBgDQojIyMgNS4yLjINCj4gQW5vdGhlciB1c2VmdWwgZHBseXIgZmlsdGVyaW5nIGhlbHBlciBpcyBgYmV0d2VlbigpYC4gV2hhdCBkb2VzIGl0IGRvPyBDYW4geW91IHVzZSBpdCB0byBzaW1wbGlmeSB0aGUgY29kZSBuZWVkZWQgdG8gYW5zd2VyIHRoZSBwcmV2aW91cyBjaGFsbGVuZ2VzPw0KDQpUaGlzIGlzIGEgc2hvcnRjdXQgZm9yIHggPj0gbGVmdCAmIHggPD0gcmlnaHQgIA0KYGJldHdlZW4odmVjdG9yIG9mIHZhbHVlcywgbGVmdCBib3VuZGFyeSwgcmlnaHQgYm91bmRhcnkpYA0KDQpgYGB7ciwgZWNobz1UUlVFfQ0KZmlsdGVyKGZsaWdodHMsIGJldHdlZW4obW9udGgsIDcsIDkpKQ0KYGBgDQojIyMgNS4yLjMNCg0KPiBIb3cgbWFueSBmbGlnaHRzIGhhdmUgYSBtaXNzaW5nIGBkZXBfdGltZWA/IFdoYXQgb3RoZXIgdmFyaWFibGVzIGFyZSBtaXNzaW5nPyBXaGF0IG1pZ2h0IHRoZXNlIHJvd3MgcmVwcmVzZW50Pw0KDQpgYGB7cn0NCmNvdW50KGZsaWdodHMsIGlzLm5hKGRlcF90aW1lKSkNCmBgYA0KDQpgYGB7cn0NCnN1bW1hcnkoZmxpZ2h0cykNCmBgYA0KIyMgNS4zIEFycmFuZ2Ugcm93cyB3aXRoIGFycmFuZ2UoKQ0KDQojIyMgNS4zLjENCj4gSG93IGNvdWxkIHlvdSB1c2UgYGFycmFuZ2UoKWAgdG8gc29ydCBhbGwgbWlzc2luZyB2YWx1ZXMgdG8gdGhlIHN0YXJ0PyAoSGludDogdXNlIGBpcy5uYSgpKWAuICANCg0KYGBge3J9DQphcnJhbmdlKGZsaWdodHMsIGRlc2MoaXMubmEoZGVwX3RpbWUpKSkNCmBgYA0KIyMjIDUuMy4yIA0KPiBTb3J0IGBmbGlnaHRzYCB0byBmaW5kIHRoZSBtb3N0IGRlbGF5ZWQgZmxpZ2h0cy4gRmluZCB0aGUgZmxpZ2h0cyB0aGF0IGxlZnQgZWFybGllc3QNCg0KYGBge3J9DQphcnJhbmdlKGZsaWdodHMsIGRlc2MoZGVwX2RlbGF5KSkNCmFycmFuZ2UoZmxpZ2h0cywgZGVwX2RlbGF5KQ0KYGBgDQojIyMgNS4zLjMNCj4gU29ydCBmbGlnaHRzIHRvIGZpbmQgdGhlIGZhc3Rlc3QgKGhpZ2hlc3Qgc3BlZWQpIGZsaWdodHMuDQoNCmBgYHtyfQ0KYXJyYW5nZShmbGlnaHRzLCBkZXNjKGRpc3RhbmNlIC8gKChob3VyICogNjApICsgbWludXRlKSkpDQpgYGANCiMjIyA1LjMuNA0KPiBXaGljaCBmbGlnaHRzIHRyYXZlbGxlZCB0aGUgZmFydGhlc3Q/IFdoaWNoIHRyYXZlbGxlZCB0aGUgc2hvcnRlc3Q/DQoNCmBgYHtyfQ0KYXJyYW5nZShmbGlnaHRzLCBkZXNjKGRpc3RhbmNlKSkNCmFycmFuZ2UoZmxpZ2h0cywgZGlzdGFuY2UpDQpgYGANCg0KIyMgNS40IFNlbGVjdCBjb2x1bXMgd2l0aCBzZWxlY3QoKQ0KDQojIyMgNS40LjENCg0KPkJyYWluc3Rvcm0gYXMgbWFueSB3YXlzIGFzIHBvc3NpYmxlIHRvIHNlbGVjdCBkZXBfdGltZSwgZGVwX2RlbGF5LCBhcnJfdGltZSwgYW5kIGFycl9kZWxheSBmcm9tIGZsaWdodHMuDQoNCmBgYHtyfQ0Kc2VsZWN0KGZsaWdodHMsIGRlcF90aW1lLCBkZXBfZGVsYXksIGFycl90aW1lLCBhcnJfZGVsYXkpDQpzZWxlY3QoZmxpZ2h0cywgc3RhcnRzX3dpdGgoJ2RlcCcpLCBzdGFydHNfd2l0aCgnYXJyJykpDQpzZWxlY3QoZmxpZ2h0cywgNCwgNiwgNywgOSkNCg0KYGBgDQojIyMgNS40LjINCj5XaGF0IGhhcHBlbnMgaWYgeW91IGluY2x1ZGUgdGhlIG5hbWUgb2YgYSB2YXJpYWJsZSBtdWx0aXBsZSB0aW1lcyBpbiBhIHNlbGVjdCgpIGNhbGw/DQoNCmBgYHtyfQ0Kc2VsZWN0KGZsaWdodHMsIGRlcF90aW1lLCBkZXBfdGltZSkNCmBgYA0KIyMjIDUuNC4zDQoNCj5XaGF0IGRvZXMgdGhlIGFueV9vZigpIGZ1bmN0aW9uIGRvPyBXaHkgbWlnaHQgaXQgYmUgaGVscGZ1bCBpbiBjb25qdW5jdGlvbiB3aXRoIHRoaXMgdmVjdG9yPw0KDQpgYGB7cn0NCnZhcnMgPC0gYygieWVhciIsICJtb250aCIsICJkYXkiLCAiZGVwX2RlbGF5IiwgImFycl9kZWxheSIpDQpmbGlnaHRzICU+JSBzZWxlY3QoYW55X29mKHZhcnMpKQ0KYGBgDQojIyMgNS40LjQNCg0KPiBEb2VzIHRoZSByZXN1bHQgb2YgcnVubmluZyB0aGUgZm9sbG93aW5nIGNvZGUgc3VycHJpc2UgeW91PyBIb3cgZG8gdGhlIHNlbGVjdCBoZWxwZXJzIGRlYWwgd2l0aCBjYXNlIGJ5IGRlZmF1bHQ/IEhvdyBjYW4geW91IGNoYW5nZSB0aGF0IGRlZmF1bHQ/DQoNCkRlZmF1bHQgaXMgdG8gaWdub3JlIGNhc2UuIA0KDQpgYGB7cn0NCnNlbGVjdChmbGlnaHRzLCBjb250YWlucygiVElNRSIpKQ0Kc2VsZWN0KGZsaWdodHMsIGNvbnRhaW5zKCJUSU1FIiwgaWdub3JlLmNhc2UgPSBGQUxTRSkpDQpgYGANCiMjIDUuNSBBZGQgbmV3IHZhcmlhYmxlcyB3aXRoIGBtdXRhdGUoKWANCg0KIyMjIDUuNS4xDQoNCj4gQ3VycmVudGx5IGBkZXBfdGltZWAgYW5kIGBzY2hlZF9kZXBfdGltZWAgYXJlIGNvbnZlbmllbnQgdG8gbG9vayBhdCwgYnV0IGhhcmQgdG8gY29tcHV0ZSB3aXRoIGJlY2F1c2UgdGhleeKAmXJlIG5vdCByZWFsbHkgY29udGludW91cyBudW1iZXJzLiBDb252ZXJ0IHRoZW0gdG8gYSBtb3JlIGNvbnZlbmllbnQgcmVwcmVzZW50YXRpb24gb2YgbnVtYmVyIG9mIG1pbnV0ZXMgc2luY2UgbWlkbmlnaHQuDQoNCmBgYHtyfQ0KdHJhbnNtdXRlKGZsaWdodHMsDQogICAgICAgICAgZGVwX3RpbWUgPSAoZGVwX3RpbWUgJS8lIDEwMCAqIDYwICsgZGVwX3RpbWUgJSUgMTAwKSAlJSAxNDQwLA0KICAgICAgICAgIHNjaGVkX2RlcF90aW1lID0gKHNjaGVkX2RlcF90aW1lICUvJSAxMDAgKiA2MCArIHNjaGVkX2RlcF90aW1lICUlIDEwMCkgJSUgMTQwMCkNCmBgYA0KIyMjIDUuNS4yDQoNCj4gQ29tcGFyZSBgYWlyX3RpbWVgIHdpdGggYGFycl90aW1lIC0gZGVwX3RpbWVgLiBXaGF0IGRvIHlvdSBleHBlY3QgdG8gc2VlPyBXaGF0IGRvIHlvdSBzZWU/IFdoYXQgZG8geW91IG5lZWQgdG8gZG8gdG8gZml4IGl0Pw0KDQpgYGB7cn0NCnNlbGVjdChtdXRhdGUoZmxpZ2h0cywgYXJyX2RlcF90aW1lID0gYXJyX3RpbWUgLSBkZXBfdGltZSksIGFpcl90aW1lLCBhcnJfZGVwX3RpbWUpDQoNCm11dGF0ZShmbGlnaHRzLCANCiAgICAgICBhcnJfdGltZSA9IChhcnJfdGltZSAlLyUgMTAwICogNjAgKyBhcnJfdGltZSAlJSAxMDApICUlIDE0NDAsDQogICAgICAgZGVwX3RpbWUgPSAoZGVwX3RpbWUgJS8lIDEwMCAqIDYwICsgZGVwX3RpbWUgJSUgMTAwKSAlJSAxNDQwKSAlPiUgDQogIHRyYW5zbXV0ZShhaXJfdGltZSwgYXJyX2RlcF90aW1lID0gYXJyX3RpbWUgLSBkZXBfdGltZSkNCmBgYA0KU3RpbGwgZG9lc24ndCBzb2x2ZSwgb3RoZXIgdmFyaWFibGVzIGNvdWxkIGJlIHRpbWUgem9uZSBkaWZmZXJlbmNlcy4NCg0KIyMjIDUuNS4zDQoNCj4gQ29tcGFyZSBkZXBfdGltZSwgc2NoZWRfZGVwX3RpbWUsIGFuZCBkZXBfZGVsYXkuIEhvdyB3b3VsZCB5b3UgZXhwZWN0IHRob3NlIHRocmVlIG51bWJlcnMgdG8gYmUgcmVsYXRlZD8NCg0KYGBge3J9DQpzZWxlY3QoZmxpZ2h0cywgZGVwX3RpbWUsIHNjaGVkX2RlcF90aW1lLCBkZXBfZGVsYXkpDQpgYGANCmBkZXBfdGltZSAtIHNjaGVkX2RlcF90aW1lID09IGRlcF9kZWxheWANCg0KIyMjIDUuNS40DQoNCj4gRmluZCB0aGUgMTAgbW9zdCBkZWxheWVkIGZsaWdodHMgdXNpbmcgYSByYW5raW5nIGZ1bmN0aW9uLiBIb3cgZG8geW91IHdhbnQgdG8gaGFuZGxlIHRpZXM/IENhcmVmdWxseSByZWFkIHRoZSBkb2N1bWVudGF0aW9uIGZvciBtaW5fcmFuaygpLg0KDQpgbWluX3JhbmsoKWAgYXNzaWducyB0aWVkIHZhbHVlcyB0aGUgc2FtZSByYW5rLiANCmBgYHtyfQ0KbWluX3JhbmsoYygxMCwgNSwgMSwgNSwgNSkpIA0KYGBgDQoNCmBgYHtyfQ0KbXV0YXRlKGZsaWdodHMsIGRlcF9kZWxheV9taW5fcmFuayA9IG1pbl9yYW5rKGRlc2MoZGVwX2RlbGF5KSkpICU+JSANCiAgYXJyYW5nZShkZXBfZGVsYXlfbWluX3JhbmspDQpgYGANCiMjIyA1LjUuNQ0KDQo+IFdoYXQgZG9lcyBgMTozICsgMToxMGAgcmV0dXJuPyBXaHk/DQoNCmBgYHtyLCB3YXJuaW5nPVRSVUV9DQoxOjMgKyAxOjEwDQpgYGANCllvdSBjYW4gb25seSBhZGQgdmVjdG9ycyBvZiBkaWZmZXJlbnQgbGVuZ3RocyBpZiBvbmUgaXMgYSBtdWx0aXBsZSBvZiBhbm90aGVyLg0KYGBge3J9DQoxOjIgKyAxOjEwDQpgYGANCiMjIyA1LjUuNg0KDQo+IFdoYXQgdHJpZ29ub21ldHJpYyBmdW5jdGlvbnMgZG9lcyBSIHByb3ZpZGU/DQoNClRoZXNlIGNhbiBiZSB2aWV3ZWQgaW4gYD9UcmlnYCBkb2N1bWVudGF0aW9uLiAgDQotIGBjb3MoeClgLCBgc2luKHgpYCwgYHRhbih4KWANCi0gYGFjb3MoeClgLCBgYXNpbih4KWAsIGBhdGFuKHgpYCwgYGF0YW4yKHksIHgpYCANCi0gYGNvc3BpKHgpYCwgYHNpbnBpKHgpYCwgYHRhbnBpKHgpYA0K